## this script estimates the main model in Table 3 and generates the substantive effects /:/ ##
## the loop also generates the models for the partisan hypothesis evaluation discussed in the  ##
## section labeled 'considering a competing explanation' /:/ ##

## please note that this script estimates the main model 1000 times ##
## and this estimation is computationally intensive and time consuming ##
## also, it goes without saying that your results will not perfectly match the those reported in the article ##

##clean up and set working directory##
	
	rm(list = ls(all=TRUE))
	
	setwd('')

## load packages ##

	library(betareg)
	library(mnormt)
	library(pscl)
	library(wnominate)
	library(apsrtable)
	library(plotrix)
	library(car)
	library(lme4)
	library(hglm)

	comMean <- c(0.04440807, 0.12450706, 0.04202934, 0.06657370, 0.06613406,
		0.07708839, 0.09642218, 0.06236180, 0.13878339, 0.05179651,
		0.05047974, 0.05792678, 0.05364169, 0.06784731)

	comSd <- c(0.004065244, 0.018311729, 0.006157834, 0.009201509, 0.006028053,
		0.007011538, 0.007806288, 0.003835676, 0.024776458, 0.005260034,
		0.005218387, 0.006655074, 0.004088974, 0.009828270)

## Analysis ##

	data <- read.table("data.txt", header=TRUE, sep="|")

	data <- data[is.na(data$errorRateIndWn)==FALSE, ]
	data <- data[data$totalVotesWn > 0, ]

	data1 <- data[data$congress<104, ]
	data2 <- data[data$congress>103, ]

	data1$maj <- recode(data1$partyCode, "100=1; else=0")
	data2$maj <- recode(data2$partyCode, "200=1; else=0")
	
	data <- rbind(data1, data2)
	data$min <- recode(data$maj, "1=0; 0=1")
	
## assign outlier status and directionality to each individual ##
	
	data$outlierDir <- c()
	dataMaj <- data[data$maj == 1, ]
	dataMin <- data[data$maj == 0, ]
	dataMaj$outlierDir <- dataMaj$majOutlier
	dataMin$outlierDir <- dataMin$minOutlier
	data <- rbind(dataMaj, dataMin)

	data$outlierDir2 <- data$outlierDir^2

	data$votesIndWn <- data$correctWn + data$incorrectWn

	write.table(data, file="merged.txt", na="NA", sep="|")

## read in matrix of new ideal points ##

	cong <- sort(unique(data$congress))
	com <- sort(unique(data$committee))
	simCoef <- matrix(NA, ncol = 18, nrow = 0)
	simMajMin <- matrix(NA, ncol = 18, nrow = 0)
	names <- matrix(NA, ncol = length(unique(data$name)), nrow = 0)
	congress <- matrix(NA, ncol = length(unique(data$congress)), nrow = 0)
	committee <- matrix(NA, ncol = length(unique(data$committee)), nrow = 0)
	majStrength <- matrix(NA, ncol = length(unique(data$congress)), nrow = 0)
	data$newCoord <- rep(NA, length(data$coord))

## this short loop generates new ideal points from the idividual distributions ##
## to model error in the rank ordering estimate ##

	mat <- matrix(NA, nrow = length(data$coord), ncol = 1000)
	
	for(i in 1:1000){
		for(k in 1:length(data$coord)){
			mat[k, i] <- rnorm(1, data$coord[k], data$se[k])
		}
		cat('iteration', i,'of 1,000 ideal point generations complete...\n')
	}
	

## this is the main loop estimating the models ##

	for(i in 1:1000){

		begin <- proc.time()[3]

	## impute the new ideal point estimates ##
	
		data$newCoord <- mat[, i]

	## impute the committee staff estimates ##
	
		for(c in 1:length(cong)){
			for(k in 1:length(com)){
				data$comStaffInd[data$committee == com[k] & data$congress == cong[c]] <- rnorm(length(data$comStaffInd[data$committee == com[k] & data$congress == cong[c]]), comMean[k], comSd[k])
				data$rankCenteredWn[data$committee == com[k] & data$congress == cong[c]] <- round(abs(rank(data$newCoord[data$committee == com[k] & data$congress == cong[c]]) - 
					(length(data$newCoord[data$committee == com[k] & data$congress == cong[c]])) / 2))
			}	
		}

	## estimate the main model ##

		model <- glmer(errorRateIndWn ~ maj*((outlierDir + outlierDir2)*member + comStaffInd + majStack) + rankCenteredWn + dimension + (1 | name) + (1 | committee) + (majStrengthInd | as.factor(data$congress)), data = data)

	## simulate 100 parameter estimates from the posterior ##
	
		sims <- rmnorm(n = 100, fixef(model), as.matrix(vcov(model)))
		simCoef <- rbind(simCoef, sims)
	
	## grab the random intercept estimates and log ##

		holdName <- ranef(model)$name[, 1]
		names <- rbind(names, holdName)
		congHold <- ranef(model)$'as.factor(data$congress)'[, 1]
		congress <- rbind(congress, congHold)
		majHold <- ranef(model)$'as.factor(data$congress)'[, 2]
		majStrength <- rbind(majStrength, majHold)
		comHold <- ranef(model)$committee[, 1]
		committee <- rbind(committee, comHold)
		
	## this model examines only congresses before the 104th to evaluate the partisan explanation ##
		
		modelMajMin <- glmer(errorRateIndWn ~ maj*((outlierDir + outlierDir2)*member + comStaffInd + majStack) + rankCenteredWn + dimension + (1 | name) + (1 | committee) + (majStrengthInd | as.factor(data$congress[data$congress < 104])), data = data[data$congress < 104, ])
	
	## take 100 draws ##

		majMin <- rmnorm(n = 100, fixef(modelMajMin), as.matrix(vcov(modelMajMin)))
		simMajMin <- rbind(simMajMin, majMin)

		cat('model estimation', i, 'of 1,000 complete...\n this estimate took', ((proc.time()[3]-begin)/60), 'minutes to converge...\n\n')
	}

## writes the paramter estimates to a new file which will be used to evaluate our hypotheses ##
## and make the graphics below ##

	write.table(simCoef, file="simCoef.txt", append=FALSE, na="NA", sep="|")
	write.table(simMajMin, file="simMaj.txt", append=FALSE, na="NA", sep="|")
	write.table(names, file="names.txt", append=FALSE, na="NA", sep="|", row.names = FALSE)
	write.table(congress, file="congress.txt", append=FALSE, na="NA", sep="|", row.names = FALSE)
	write.table(majStrength, file="majStrength.txt", append=FALSE, na="NA", sep="|", row.names = FALSE)
	write.table(committee, file="committee.txt", append=FALSE, na="NA", sep="|", row.names = FALSE)


## substantive effects shown in Figure 2 ##

	simCoef <- read.table("simCoef.txt", sep = '|')
	summary(simCoef)

	range <- seq(from = 0, to = 1, by = 0.01)
	
	majHi <- c()
	majMed <- c()
	majLo <- c()
	minHi <- c()
	minMed <- c()
	minLo <- c()
	majMemHi <- c()
	majMemMed <- c()
	majMemLo <- c()
	minMemHi <- c()
	minMemMed <- c()
	minMemLo <- c()
	diffMajHi <- c()
	diffMajMed <- c()
	diffMajLo <- c()
	diffMinHi <- c()
	diffMinMed <- c()
	diffMinLo <- c()
	
	for(i in 1:length(range)){
		min <- simCoef[, 1] + 
			simCoef[, 2]*0 + 
			simCoef[, 3]*range[i] + 
			simCoef[, 4]*(range[i]^2) + 
			simCoef[, 5]*0 + 
			simCoef[, 6]*mean(data$comStaffInd, na.rm=TRUE) +
			simCoef[, 7]*mean(data$majStack, na.rm=TRUE) +
			simCoef[, 8]*mean(data$rankCenteredWn, na.rm=TRUE) +
			simCoef[, 9]*mean(data$dimension, na.rm=TRUE) +
			simCoef[, 10]*range[i]*0 + 
			simCoef[, 11]*(range[i]^2)*0 + 
			simCoef[, 12]*range[i]*0 + 
			simCoef[, 13]*(range[i]^2)*0 + 
			simCoef[, 14]*0 + 
			simCoef[, 15]*mean(data$comStaffInd, na.rm=TRUE)*0 +
			simCoef[, 16]*mean(data$majStack, na.rm=TRUE)*0 +
			simCoef[, 17]*range[i]*0 + 
			simCoef[, 18]*(range[i]^2)*0
			
		maj <- simCoef[, 1] + 
			simCoef[, 2]*1 + 
			simCoef[, 3]*range[i] + 
			simCoef[, 4]*(range[i]^2) + 
			simCoef[, 5]*0 + 
			simCoef[, 6]*mean(data$comStaffInd, na.rm=TRUE) + 
			simCoef[, 7]*mean(data$majStack, na.rm=TRUE) + 
			simCoef[, 8]*mean(data$rankCenteredWn, na.rm=TRUE) +
			simCoef[, 9]*mean(data$dimension, na.rm=TRUE) +
			simCoef[, 10]*range[i]*0 + 
			simCoef[, 11]*(range[i]^2)*0 + 
			simCoef[, 12]*range[i]*1 + 
			simCoef[, 13]*(range[i]^2)*1 + 
			simCoef[, 14]*0 + 
			simCoef[, 15]*mean(data$comStaffInd, na.rm=TRUE) + 
			simCoef[, 16]*mean(data$majStack, na.rm=TRUE) + 
			simCoef[, 17]*range[i]*0 + 
			simCoef[, 18]*(range[i]^2)*0

		minMem <- simCoef[, 1] + 
			simCoef[, 2]*0 + 
			simCoef[, 3]*range[i] + 
			simCoef[, 4]*(range[i]^2) + 
			simCoef[, 5]*1 + 
			simCoef[, 6]*mean(data$comStaffInd, na.rm=TRUE) + 
			simCoef[, 7]*mean(data$majStack, na.rm=TRUE) + 
			simCoef[, 8]*mean(data$rankCenteredWn, na.rm=TRUE) +
			simCoef[, 9]*mean(data$dimension, na.rm=TRUE) +
			simCoef[, 10]*range[i]*1 + 
			simCoef[, 11]*(range[i]^2)*1 + 
			simCoef[, 12]*range[i]*0 + 
			simCoef[, 13]*(range[i]^2)*0 + 
			simCoef[, 14]*0 + 
			simCoef[, 15]*mean(data$comStaffInd, na.rm=TRUE)*0 + 
			simCoef[, 16]*mean(data$majStack, na.rm=TRUE)*0 + 
			simCoef[, 17]*range[i]*0 + 
			simCoef[, 18]*(range[i]^2)*0

		majMem <- simCoef[, 1] + 
			simCoef[, 2]*1 + 
			simCoef[, 3]*range[i] + 
			simCoef[, 4]*(range[i]^2) + 
			simCoef[, 5]*1 + 
			simCoef[, 6]*mean(data$comStaffInd, na.rm=TRUE) + 
			simCoef[, 7]*mean(data$majStack, na.rm=TRUE) + 
			simCoef[, 8]*mean(data$rankCenteredWn, na.rm=TRUE) +
			simCoef[, 9]*mean(data$dimension, na.rm=TRUE) +
			simCoef[, 10]*range[i]*1 + 
			simCoef[, 11]*(range[i]^2)*1 + 
			simCoef[, 12]*range[i]*1 + 
			simCoef[, 13]*(range[i]^2)*1 + 
			simCoef[, 14]*1 + 
			simCoef[, 15]*mean(data$comStaffInd, na.rm=TRUE) + 
			simCoef[, 16]*mean(data$majStack, na.rm=TRUE) + 
			simCoef[, 17]*range[i]*1 + 
			simCoef[, 18]*(range[i]^2)*1
			
		majHi[i] <- quantile(maj, 0.975)
		majMed[i] <- quantile(maj, 0.5)
		majLo[i] <- quantile(maj, 0.025)
		minHi[i] <- quantile(min, 0.975)
		minMed[i] <- quantile(min, 0.5)
		minLo[i] <- quantile(min, 0.025)
		majMemHi[i] <- quantile(majMem, 0.975)
		majMemMed[i] <- quantile(majMem, 0.5)
		majMemLo[i] <- quantile(majMem, 0.025)
		minMemHi[i] <- quantile(minMem, 0.975)
		minMemMed[i] <- quantile(minMem, 0.5)
		minMemLo[i] <- quantile(minMem, 0.025)

		diffMajHi[i] <- quantile(maj - majMem, 0.975)
		diffMajMed[i] <- quantile(maj - majMem, 0.5)
		diffMajLo[i] <- quantile(maj - majMem, 0.025)
		diffMinHi[i] <- quantile(min - minMem, 0.975)
		diffMinMed[i] <- quantile(min - minMem, 0.5)
		diffMinLo[i] <- quantile(min - minMem, 0.025)
	}


	pdf(file = 'curvilinearDiff.pdf', width = 14, height = 14)
	par(mfrow = c(2, 1))
		plot(1, 1, type = 'n',
			xlim = c(0,1),
			ylim = c(-.015,.04),
			xlab = '',
			ylab = '',
			axes = FALSE)
		axis(2, line = -1)
		axis(4, line = -1)
		axis(1, at = seq(0, 1, .25))
		axis(1, at = c(0, .5, 1), labels = c('Extreme Outlier', 'Perfectly Representative', 'Moderate Outlier'), line = 1, col = 'white')
		polygon(c(0, 1, 1, 0), c(-.015, -.015, .04, .04), col = rgb(0, 0, 0, .05), border = NA)
		for(i in -3:9){
			lines(c(0, 1), c(i*.005, i*.005), col = 'white')
		}
		for(i in 1:20){
			lines(c(i*.05, i*.05), c(-.015,.04), col = 'white')
		}
		polygon(c(range, rev(range)), c(diffMajHi, rev(diffMajLo)), col = rgb(0, 0, 0, .3), border = NA)
		lines(range, diffMajMed, col = rgb(0, 0, 0, .3), lwd = 2)
		lines(c(0,1), c(0,0), col = rgb(0, 0, 0, .3), lwd = 3, lty = 3)
		mtext('Difference in Majority Floor and Committee Error Rates', side = 2, line = 1.5, cex = 1.2)
		mtext('Committee Representativeness', side = 1, line = 3.5, cex = 1.2)
		mtext('Effect of Majority Committee Composition on\nDifference in Majority Floor and Committee Member Error Rates', cex = 1.5)

		plot(1, 1, type = 'n',
			xlim = c(0,1),
			ylim = c(-.015,.04),
			xlab = '',
			ylab = '',
			axes = FALSE)
		axis(2, line = -1)
		axis(4, line = -1)
		axis(1, at = seq(0, 1, .25))
		axis(1, at = c(0, .5, 1), labels = c('Extreme Outlier', 'Perfectly Representative', 'Moderate Outlier'), line = 1, col = 'white')
		polygon(c(0, 1, 1, 0), c(-.015, -.015, .04, .04), col = rgb(0, 0, 0, .05), border = NA)
		for(i in -3:9){
			lines(c(0, 1), c(i*.005, i*.005), col = 'white')
		}
		for(i in 1:20){
			lines(c(i*.05, i*.05), c(-.015,.04), col = 'white')
		}
		polygon(c(range, rev(range)), c(diffMinHi, rev(diffMinLo)), col = rgb(0, 0, 0, .3), border = NA)
		lines(range, diffMinMed, col = rgb(0, 0, 0, .3), lwd = 2)
		lines(c(0,1), c(0,0), col = rgb(0, 0, 0, .3), lwd = 3, lty = 3)
		arrows(.87, -0.0065, .87, 0, length = .0625)
		text(.87, -0.009, 'breaking point\n0.87')
		mtext('Difference in Minority Floor and Committee Error Rates', side = 2, line = 1.5, cex = 1.2)
		mtext('Committee Representativeness', side = 1, line = 3.5, cex = 1.2)
		mtext('Effect of Minority Committee Composition on\nDifference in Minority Floor and Committee Member Error Rates', cex = 1.5)
	dev.off()
	

## now let's plot our paramter distributions ##

	parNames <- c('Intercept', 'Majority',  'Representativeness',  'Representativeness^2',  'Committee Member',  'Committee Staff',  'Maority Stacking',  'Median Distance',  'Dimensionality',  'Representativeness * Member',  'Representativeness^2 * Member', 'Majority * Representativeness',  'Majority * Representativeness^2', 'Majority * Member',  'Majority * Committee Staff',  'Majority * Majority Stacking',  'Representativeness * Member',  'Representativeness^2 * Member')

	pdf(file = 'fixedParameters.pdf', width = 16, height = 14)
		par(mfrow = c(4, 5))
		for(i in 1:18){
			plot(density(simCoef[, i]), type = 'n',
				xlab = '',
				ylab = '',
				axes = FALSE,
				main = '')
			axis(1)
			axis(2, at = c(min(density(simCoef[, i])$y), max(density(simCoef[, i])$y)), labels = c(0, 'Max'))
			polygon(density(simCoef[, i]), col = rgb(1, 0, 0, .4), border = rgb(1, 0, 0, .5), lwd = 2)
			mtext(parNames[i], line = 2, cex = 1.25)
		}
	dev.off()

	congress <- read.table("congress.txt", sep = '|')
	pdf(file = 'congress.pdf', width = 16, height = 14)
		par(mfrow = c(5, 5))
		for(i in 1:25){
			plot(density(as.numeric(paste(congress[, i])), na.rm = TRUE), type = 'n',
				xlab = '',
				ylab = '',
				axes = FALSE,
				main = '')
			axis(1)
			axis(2, at = c(min(density(as.numeric(paste(congress[, i])), na.rm = TRUE)$y), max(density(as.numeric(paste(congress[, i])), na.rm = TRUE)$y)), labels = c(0, 'Max'))
			polygon(density(as.numeric(paste(congress[, i])), na.rm = TRUE), col = rgb(1, 0, 0, .4), border = rgb(1, 0, 0, .5), lwd = 2)
			mtext(paste('Congress', 83+i), line = 2, cex = 1.25)
		}
	dev.off()

	majStrength <- read.table("majStrength.txt", sep = '|')
	pdf(file = 'majStrength.pdf', width = 16, height = 14)
		par(mfrow = c(5, 5))
		for(i in 1:25){
			plot(density(as.numeric(paste(majStrength[, i])), na.rm = TRUE), type = 'n',
				xlab = '',
				ylab = '',
				axes = FALSE,
				main = '')
			axis(1)
			axis(2, at = c(min(density(as.numeric(paste(majStrength[, i])), na.rm = TRUE)$y), max(density(as.numeric(paste(majStrength[, i])), na.rm = TRUE)$y)), labels = c(0, 'Max'))
			polygon(density(as.numeric(paste(majStrength[, i])), na.rm = TRUE), col = rgb(1, 0, 0, .4), border = rgb(1, 0, 0, .5), lwd = 2)
			mtext(paste('Majority Margin in Congress', 83+i), line = 2)
		}
	dev.off()

	comNames <- c("Agriculture","Appropriations","Armed Services", "Banking, Finance, and Urban Affairs","Budget","Education",
		"Energy and Commerce","Foreign Affairs","Government Operations", "Judiciary","Natural Resources","Public Works",
		"Science","Ways and Means")

	committee <- read.table("committee.txt", sep = '|')
	pdf(file = 'committee.pdf', width = 16, height = 14)
		par(mfrow = c(4, 4))
		for(i in 1:16){
			plot(density(as.numeric(paste(committee[, i])), na.rm = TRUE), type = 'n',
				xlab = '',
				ylab = '',
				axes = FALSE,
				main = '')
			axis(1)
			axis(2, at = c(min(density(as.numeric(paste(committee[, i])), na.rm = TRUE)$y), max(density(as.numeric(paste(committee[, i])), na.rm = TRUE)$y)), labels = c(0, 'Max'))
			polygon(density(as.numeric(paste(committee[, i])), na.rm = TRUE), col = rgb(1, 0, 0, .4), border = rgb(1, 0, 0, .5), lwd = 2)
			mtext(comNames[i], line = 2, cex = 1.25)
		}
	dev.off()

	names <- read.table("names.txt", sep = '|')
	catch <- rep(NA, (dim(names)[1]*dim(names)[2]))
	for(i in 1:dim(names)[2]){
		catch <- c(catch, as.numeric(paste(names[, i])))
	}
	pdf(file = 'names.pdf')
			plot(density(catch, na.rm = TRUE), type = 'n',
				xlab = '',
				ylab = '',
				axes = FALSE,
				main = '')
			axis(1)
			axis(2, at = c(min(density(catch, na.rm = TRUE)$y), max(density(catch, na.rm = TRUE)$y)), labels = c(0, 'Max'))
			polygon(density(catch, na.rm = TRUE), col = rgb(1, 0, 0, .4), border = rgb(1, 0, 0, .5), lwd = 2)
			mtext('Representative level effects', line = 2, cex = 1.25)
	dev.off()

## assess fit for out of prediction exercise ##
	
	data104 <- read.table('~/merged.txt', sep = '|')
	data104 <- data104[data104$maj == 1 & data104$congress > 103, ]
	data104$outlierDir2 <- data104$outlierDir^2
	majMinSims <- read.table('~/simMaj.txt', sep = '|')
	fit <- rep(NA, length(majMinSims[, 1]))
	minDiff <- c()
	majDiff <- c()

	for(i in 1:length(majMinSims[, 1])){
		predMaj <- majMinSims[i, 1] + 
			majMinSims[i, 2] + 
			majMinSims[i, 3]*data104$outlierDir + 
			majMinSims[i, 4]*data104$outlierDir2 + 
			majMinSims[i, 5]*data104$member + 
			majMinSims[i, 6]*data104$comStaffInd +
			majMinSims[i, 7]*data104$majStack +
			majMinSims[i, 8]*data104$rankCenteredWn +
			majMinSims[i, 9]*data104$dimension +
			majMinSims[i, 10]*data104$outlierDir*data104$member + 
			majMinSims[i, 11]*data104$outlierDir2*data104$member + 
			majMinSims[i, 12]*data104$outlierDir + 
			majMinSims[i, 13]*data104$outlierDir2 + 
			majMinSims[i, 14]*data104$member + 
			majMinSims[i, 15]*data104$comStaffInd +
			majMinSims[i, 16]*data104$majStack +
			majMinSims[i, 17]*data104$outlierDir*data104$member + 
			majMinSims[i, 18]*data104$outlierDir2*data104$member

		predMin <- majMinSims[i, 1] + 
			majMinSims[i, 3]*data104$outlierDir + 
			majMinSims[i, 4]*data104$outlierDir2 + 
			majMinSims[i, 5]*data104$member + 
			majMinSims[i, 6]*data104$comStaffInd +
			majMinSims[i, 7]*data104$majStack +
			majMinSims[i, 8]*data104$rankCenteredWn +
			majMinSims[i, 9]*data104$dimension +
			majMinSims[i, 10]*data104$outlierDir*data104$member + 
			majMinSims[i, 11]*data104$outlierDir2*data104$member
		
		majDiff[i] <- mean(abs(predMaj - data104$errorRateIndWn))
		minDiff[i] <- mean(abs(predMin - data104$errorRateIndWn))
		fit[i] <- mean(abs(predMaj - data104$errorRateIndWn)) < mean(abs(predMin - data104$errorRateIndWn))
		cat('number', i, 'of', length(majMinSims[, 1]), 'complete...\n')
	}
	
	summary(majDiff)
	summary(minDiff)
	summary(as.numeric(fit))
	cat('Script complete...\nI am done.\nDone.\nDONE!!!')

## descriptives ##

	round(mean(data$errorRateIndWn), 3)
	round(mean(data$maj), 3)
	round(mean(data$outlierDir), 3)
	round(mean(data$member), 3)
	round(mean(data$comStaffInd), 3)
	round(mean(data$majStack), 3)
	round(mean(data$rankCenteredWn), 3)
	round(mean(data$dimension), 3)
	round(mean(data$majStrengthInd), 3)

	round(sd(data$errorRateIndWn), 3)
	round(sd(data$maj), 3)
	round(sd(data$outlierDir), 3)
	round(sd(data$member), 3)
	round(sd(data$comStaffInd), 3)
	round(sd(data$majStack), 3)
	round(sd(data$rankCenteredWn), 3)
	round(sd(data$dimension), 3)
	round(sd(data$majStrengthInd), 3)
